/* ******************************************************************************************************* */
/* **************************** Construct datasets on mutual fund characteristics **************************/
/* ******************************************************************************************************* */

proc sql;
  create table fundqtr as
  select a.crsp_cl_grp, b.crsp_fundno, b.caldt, b.mgr_name, b.exp_ratio, b.fund_name, b.mgmt_fee, b.mgmt_name, b.index_fund_flag, b.adv_name, b.ticker, 
    b.mgmt_cd as mgmt_no, b.per_cash as cash, b.per_com as stock, b.merge_fundno, b.retail_fund as ret_fund, b.inst_fund, b.et_flag, b.crsp_obj_cd,
    b.si_obj_cd, b.wbrger_obj_cd, b.lipper_obj_cd, b.lipper_asset_cd, b.lipper_class, b.first_offer_dt, b.actual_12b1, b.yield, b.fiscal_yearend, b.turn_ratio
  from crspmf.portnomap as a, crspmf.fund_summary2 as b
  where a.crsp_fundno = b.crsp_fundno and b.summary_period = 'Q' and year(caldt) >= 2000 and a.begdt <= b.caldt <= a.enddt;
quit;

data fund_fees; set crspmf.fund_fees;
  if fiscal_yearend = . then fiscal_yearend = begdt;
  /* If the variable fiscal yearend is present in the file, turnover is measured over the 12-month period ending on the fiscal yearend date; otherwise
     turnover is measured over the 12-month period ending on the date marked by the variable begdt */
  /* There are cases of duplicate (crsp_fundno fiscal_yearend) pairs 
     ==> use the begdt as the period end date wherever begdt is available */ 
  fiscal_yearend_ = fiscal_yearend;
  if begdt ne . then fiscal_yearend_ = begdt;
  format fiscal_yearend_ date9.;
  if turn_ratio <= -99 then turn_ratio = .;
  keep crsp_fundno fiscal_yearend fiscal_yearend_ turn_ratio exp_ratio;

proc sort nodupkey; by crsp_fundno fiscal_yearend_; where crsp_fundno ne . and fiscal_yearend_ ne .; run;

proc sql;
  create table fundqtr as
  select a.*, b.turn_ratio as turnover, b.exp_ratio as expenses, b.fiscal_yearend_
  from fundqtr as a left join fund_fees as b
  on a.crsp_fundno = b.crsp_fundno and 0 <= intck('month', b.fiscal_yearend_, a.caldt) <=12;
quit;

proc sort; by crsp_fundno caldt fiscal_yearend fiscal_yearend_; run;

data fundqtr; set fundqtr; by crsp_fundno caldt;
  if last.caldt;
  if turnover <= -99 then turnover = .;
  if turn_ratio <= -99 then turn_ratio = .;
  if expenses <= -99 then expenses = .;
  if exp_ratio <= -99 then exp_ratio = .;

proc sort data = crspmf.contact_info out = contact_info; by crsp_fundno chgdt chgenddt;
run;

proc sql;
  create table fundqtr as
  select a.*, b.city, b.state, b.zip
  from fundqtr as a left join contact_info as b
  on a.crsp_fundno = b.crsp_fundno and b.chgdt <= a.caldt <= b.chgenddt;
quit;

proc sql;
  create table fundqtr as
  select a.*, b.county_no_ as county_no, b.msa_no_ as msa_no, b.msa_name
  from fundqtr as a left join root.fs10_gpci_by_zip as b
  on a.zip = b.zip_code;
quit;

data monthly_tna; set crspmf.monthly_tna;
  if mtna <= -99 then mtna = .;
  if mtna ne .;

proc sql;
  create table fundqtr as
  select a.*, b.caldt as caldt2, b.mtna
  from fundqtr as a left join crspmf.monthly_tna as b
  on a.crsp_fundno = b.crsp_fundno and intck('quarter', b.caldt, a.caldt) = 0;
quit;

proc sort; by crsp_fundno caldt caldt2;

data fundqtr; set fundqtr; by crsp_fundno caldt caldt2;
  if last.caldt;
  drop caldt2;
run;

* merge with load information: use the maximum load fees;
* front load fees;
proc sort data = crspmf.front_load out = front_load; by crsp_fundno begdt enddt front_load;

data front_load; set front_load; by crsp_fundno begdt enddt;
  if last.enddt;
run;

proc sql;
  create table fundqtr1 as
  select a.*, b.front_load as frnt_ld
  from fundqtr as a left join front_load as b
  on a.crsp_fundno = b.crsp_fundno and b.begdt <= a.caldt <= b.enddt;
quit;

data fundno2wficn; set mflinks.mflink1;
  if crsp_fundno ne . and wficn ne .;

proc sql;
  create table fundqtr1 as
  select a.*, b.wficn
  from fundqtr1 as a left join fundno2wficn as b
  on a.crsp_fundno = b.crsp_fundno;
quit;

proc sort data = fundqtr1; by wficn crsp_fundno caldt frnt_ld;

data fundqtr1; set fundqtr1; by wficn crsp_fundno caldt;
  if last.caldt;
run;

* rear load fees;
proc sort data = crspmf.rear_load out = rear_load; by crsp_fundno begdt enddt rear_load;

data rear_load; set rear_load; by crsp_fundno begdt enddt;
  if last.enddt;
run;

proc sql;
  create table fundqtr1r as
  select a.*, b.rear_load as rear_ld
  from fundqtr1 as a left join rear_load as b
  on a.crsp_fundno = b.crsp_fundno and begdt <= a.caldt <= enddt;
quit;

proc sort data = fundqtr1r; by wficn crsp_fundno caldt rear_ld;

data fundqtr1r; set fundqtr1r; by wficn crsp_fundno caldt;
  if last.caldt;

proc sort data = fundqtr1r; by crsp_fundno caldt;

data fundqtr1x; set fundqtr1r; by crsp_fundno caldt;
  if msa_name = '' and city ne '' then msa_name = city;
  if turnover <= -99 then turnover = .;
  if frnt_ld <= -99 then frnt_ld = .;
  if rear_ld <= -99 then rear_ld = .;
  if expenses <= -99 then expenses = .;
  if actual_12b1 <= -99 then actual_12b1 = .;
  if yield <= -99 then yield = .;
  if mtna <= -99 then mtna = .;

  retain lfrnt_ld lrear_ld lexpenses ladv_name lticker lactual_12b1 lyield lmgmt_no lfund_name lmgmt_fee llipper_obj_cd lsi_obj_cd llipper_class lwbrger_obj_cd llipper_asset_cd lcrsp_obj_cd lindex_fund_flag let_flag;

  if frnt_ld ne . then lfrnt_ld=frnt_ld; 
  if rear_ld ne . then lrear_ld=rear_ld; 
  if expenses ne . then lexpenses=expenses; 
  if actual_12b1 ne . then lactual_12b1=actual_12b1; 
  if yield ne . then lyield=yield; 
  if mgmt_fee ne . then lmgmt_fee=mgmt_fee;
  if mgmt_no ne '' then lmgmt_no=mgmt_no;
  if lipper_obj_cd ne '' then llipper_obj_cd=lipper_obj_cd;
  if si_obj_cd ne '' then lsi_obj_cd=si_obj_cd;
  if lipper_class ne '' then llipper_class=lipper_class;
  if wbrger_obj_cd ne '' then lwbrger_obj_cd=wbrger_obj_cd;
  if lipper_asset_cd ne '' then llipper_asset_cd=lipper_asset_cd;
  if crsp_obj_cd ne '' then lcrsp_obj_cd=crsp_obj_cd;
  if index_fund_flag ne '' then lindex_fund_flag=index_fund_flag;
  if et_flag ne '' then let_flag=et_flag;
  if adv_name ne '' then ladv_name=adv_name;
  if ticker ne '' then lticker=ticker;

  if first.crsp_fundno and frnt_ld = . then lfrnt_ld = .; 
  if first.crsp_fundno and rear_ld = . then lrear_ld = .; 
  if first.crsp_fundno and expenses = . then lexpenses = .; 
  if first.crsp_fundno and actual_12b1 = . then lactual_12b1 = .; 
  if first.crsp_fundno and yield = . then lyield = .; 
  if first.crsp_fundno and mgmt_fee = . then lmgmt_fee = .; 
  if first.crsp_fundno and mgmt_no = '' then lmgmt_no = ''; 
  if first.crsp_fundno and lipper_obj_cd = '' then llipper_obj_cd = ''; 
  if first.crsp_fundno and si_obj_cd = '' then lsi_obj_cd = ''; 
  if first.crsp_fundno and lipper_class = '' then llipper_class = ''; 
  if first.crsp_fundno and wbrger_obj_cd = '' then lwbrger_obj_cd = ''; 
  if first.crsp_fundno and lipper_asset_cd = '' then llipper_asset_cd = ''; 
  if first.crsp_fundno and crsp_obj_cd = '' then lcrsp_obj_cd = ''; 
  if first.crsp_fundno and index_fund_flag = '' then lindex_fund_flag = ''; 
  if first.crsp_fundno and et_flag = '' then let_flag = ''; 
  if first.crsp_fundno and adv_name = '' then ladv_name = ''; 
  if first.crsp_fundno and ticker = '' then lticker = ''; 

  frnt_ld2=frnt_ld;
  rear_ld2=rear_ld;
  expenses2=expenses; 
  actual_12b12=actual_12b1;
  yield2=yield; 
  mgmt_fee2=mgmt_fee;
  mgmt_no2=mgmt_no;
  lipper_obj_cd2=lipper_obj_cd;
  si_obj_cd2=si_obj_cd;
  lipper_class2=lipper_class;
  wbrger_obj_cd2=wbrger_obj_cd;
  lipper_asset_cd2=lipper_asset_cd;
  crsp_obj_cd2=crsp_obj_cd;
  index_fund_flag2=index_fund_flag;
  et_flag2=et_flag;
  adv_name2=adv_name;
  ticker2=ticker;

  if not first.crsp_fundno and frnt_ld = . and lfrnt_ld ne . then frnt_ld2 = lfrnt_ld; 
  if not first.crsp_fundno and rear_ld = . and lrear_ld ne . then rear_ld2 = lrear_ld; 
  if not first.crsp_fundno and expenses = . and lexpenses ne . then expenses2 = lexpenses; 
  if not first.crsp_fundno and actual_12b1 = . and lactual_12b1 ne . then actual_12b12 = lactual_12b1; 
  if not first.crsp_fundno and yield = . and lyield ne . then yield2 = lyield; 
  if not first.crsp_fundno and mgmt_fee = . and lmgmt_fee ne . then mgmt_fee2 = lmgmt_fee; 
  if not first.crsp_fundno and mgmt_no = '' and lmgmt_no ne '' then mgmt_no2 = lmgmt_no; 
  if not first.crsp_fundno and lipper_obj_cd = '' and llipper_obj_cd ne '' then lipper_obj_cd2 = llipper_obj_cd; 
  if not first.crsp_fundno and si_obj_cd = '' and lsi_obj_cd ne '' then si_obj_cd2 = lsi_obj_cd; 
  if not first.crsp_fundno and lipper_class = '' and llipper_class ne '' then lipper_class2 = llipper_class; 
  if not first.crsp_fundno and wbrger_obj_cd = '' and lwbrger_obj_cd ne '' then wbrger_obj_cd2 = lwbrger_obj_cd; 
  if not first.crsp_fundno and lipper_asset_cd = '' and llipper_asset_cd ne '' then lipper_asset_cd2 = llipper_asset_cd; 
  if not first.crsp_fundno and crsp_obj_cd = '' and lcrsp_obj_cd ne '' then crsp_obj_cd2 = lcrsp_obj_cd; 
  if not first.crsp_fundno and index_fund_flag = '' and lindex_fund_flag ne '' then index_fund_flag2 = lindex_fund_flag; 
  if not first.crsp_fundno and et_flag = '' and let_flag ne '' then et_flag2 = let_flag; 
  if not first.crsp_fundno and adv_name = '' and ladv_name ne '' then adv_name2 = ladv_name; 
  if not first.crsp_fundno and ticker = '' and lticker ne '' then ticker2 = lticker; 

  if fund_name ne '' then lfund_name=fund_name;
  if first.crsp_fundno and fund_name = '' then lfund_name = ''; 
  fund_name2=fund_name;
  if not first.crsp_fundno and fund_name = '' and lfund_name ne '' then fund_name2 = lfund_name; 

  drop lfrnt_ld lrear_ld lexpenses ladv_name lticker lactual_12b1 lyield lmgmt_no lfund_name lmgmt_fee llipper_obj_cd lsi_obj_cd llipper_class lwbrger_obj_cd llipper_asset_cd lcrsp_obj_cd lindex_fund_flag let_flag;

proc sort data = fundqtr1x; by crsp_fundno descending caldt;

data fundqtr2; set fundqtr1x; by crsp_fundno descending caldt;
  retain lfrnt_ld lrear_ld lexpenses ladv_name lticker lactual_12b1 lmgmt_no lfund_name lmgmt_fee llipper_obj_cd lsi_obj_cd llipper_class lwbrger_obj_cd llipper_asset_cd lcrsp_obj_cd lindex_fund_flag let_flag;
  if frnt_ld ne . then lfrnt_ld=frnt_ld; 
  if rear_ld ne . then lrear_ld=rear_ld; 
  if expenses ne . then lexpenses=expenses; 
  if actual_12b1 ne . then lactual_12b1=actual_12b1; 
  if yield ne . then lyield=yield; 
  if mgmt_fee ne . then lmgmt_fee=mgmt_fee;
  if mgmt_no ne '' then lmgmt_no=mgmt_no;
  if lipper_obj_cd ne '' then llipper_obj_cd=lipper_obj_cd;
  if si_obj_cd ne '' then lsi_obj_cd=si_obj_cd;
  if lipper_class ne '' then llipper_class=lipper_class;
  if wbrger_obj_cd ne '' then lwbrger_obj_cd=wbrger_obj_cd;
  if lipper_asset_cd ne '' then llipper_asset_cd=lipper_asset_cd;
  if crsp_obj_cd ne '' then lcrsp_obj_cd=crsp_obj_cd;
  if index_fund_flag ne '' then lindex_fund_flag=index_fund_flag;
  if et_flag ne '' then let_flag=et_flag;
  if adv_name ne '' then ladv_name=adv_name;
  if ticker ne '' then lticker=ticker;
 
  if first.crsp_fundno and frnt_ld = . then lfrnt_ld = .; 
  if first.crsp_fundno and rear_ld = . then lrear_ld = .; 
  if first.crsp_fundno and expenses = . then lexpenses = .; 
  if first.crsp_fundno and actual_12b1 = . then lactual_12b1 = .; 
  if first.crsp_fundno and yield = . then lyield = .; 
  if first.crsp_fundno and mgmt_fee = . then lmgmt_fee = .; 
  if first.crsp_fundno and mgmt_no = '' then lmgmt_no = ''; 
  if first.crsp_fundno and lipper_obj_cd = '' then llipper_obj_cd = ''; 
  if first.crsp_fundno and si_obj_cd = '' then lsi_obj_cd = ''; 
  if first.crsp_fundno and lipper_class = '' then llipper_class = ''; 
  if first.crsp_fundno and wbrger_obj_cd = '' then lwbrger_obj_cd = ''; 
  if first.crsp_fundno and lipper_asset_cd = '' then llipper_asset_cd = ''; 
  if first.crsp_fundno and crsp_obj_cd = '' then lcrsp_obj_cd = ''; 
  if first.crsp_fundno and index_fund_flag = '' then lindex_fund_flag = ''; 
  if first.crsp_fundno and et_flag = '' then let_flag = ''; 
  if first.crsp_fundno and adv_name = '' then ladv_name = ''; 
  if first.crsp_fundno and ticker = '' then lticker = ''; 

  if not first.crsp_fundno and frnt_ld = . and lfrnt_ld ne . then frnt_ld2 = lfrnt_ld; 
  if not first.crsp_fundno and rear_ld = . and lrear_ld ne . then rear_ld2 = lrear_ld; 
  if not first.crsp_fundno and expenses = . and lexpenses ne . then expenses2 = lexpenses; 
  if not first.crsp_fundno and actual_12b1 = . and lactual_12b1 ne . then actual_12b12 = lactual_12b1; 
  if not first.crsp_fundno and yield = . and lyield ne . then yield2 = lyield; 
  if not first.crsp_fundno and mgmt_fee = . and lmgmt_fee ne . then mgmt_fee2 = lmgmt_fee; 
  if not first.crsp_fundno and mgmt_no = '' and lmgmt_no ne '' then mgmt_no2 = lmgmt_no; 
  if not first.crsp_fundno and lipper_obj_cd = '' and llipper_obj_cd ne '' then lipper_obj_cd2 = llipper_obj_cd; 
  if not first.crsp_fundno and si_obj_cd = '' and lsi_obj_cd ne '' then si_obj_cd2 = lsi_obj_cd; 
  if not first.crsp_fundno and lipper_class = '' and llipper_class ne '' then lipper_class2 = llipper_class; 
  if not first.crsp_fundno and wbrger_obj_cd = '' and lwbrger_obj_cd ne '' then wbrger_obj_cd2 = lwbrger_obj_cd; 
  if not first.crsp_fundno and lipper_asset_cd = '' and llipper_asset_cd ne '' then lipper_asset_cd2 = llipper_asset_cd; 
  if not first.crsp_fundno and crsp_obj_cd = '' and lcrsp_obj_cd ne '' then crsp_obj_cd2 = lcrsp_obj_cd; 
  if not first.crsp_fundno and index_fund_flag = '' and lindex_fund_flag ne '' then index_fund_flag2 = lindex_fund_flag; 
  if not first.crsp_fundno and et_flag = '' and let_flag ne '' then et_flag2 = let_flag; 
  if not first.crsp_fundno and adv_name = '' and ladv_name ne '' then adv_name2 = ladv_name; 
  if not first.crsp_fundno and ticker = '' and lticker ne '' then ticker2 = lticker; 

  if fund_name ne '' then lfund_name=fund_name;
  if first.crsp_fundno and fund_name = '' then lfund_name = ''; 
  if not first.crsp_fundno and fund_name = '' and lfund_name ne '' then fund_name2 = lfund_name; 

  drop lfrnt_ld lrear_ld lexpenses ladv_name lticker lactual_12b1 lyield lmgmt_no lfund_name lmgmt_fee llipper_obj_cd lsi_obj_cd llipper_class lwbrger_obj_cd llipper_asset_cd lcrsp_obj_cd lindex_fund_flag let_flag;
 
  if frnt_ld2 = . then frnt_ld2 = 0; 
  if rear_ld2 = . then rear_ld2 = 0; 
  if mgmt_fee2 = . then mgmt_fee2 = 0; 
  if frnt_ld = . and rear_ld ne . then frnt_ld = 0;
  if frnt_ld ne . and rear_ld = . then rear_ld = 0;
  total_loads = frnt_ld+rear_ld;
  total_loads2 = frnt_ld2+rear_ld2;

  if index_fund_flag in ('D') or rxmatch("index", fund_name) > 0 or rxmatch("idx", fund_name) > 0 then index_fund = 1; else index_fund = 0; * B � Index-based fund;  * D � Index Fund;  * E � Index Fund Enhanced;
  if index_fund_flag2 in ('D') or rxmatch("index", fund_name) > 0 or rxmatch("idx", fund_name) > 0 then index_fund2 = 1; else index_fund2 = 0; * B � Index-based fund;  * D � Index Fund;  * E � Index Fund Enhanced;
  if et_flag in ('F', 'N') then et = 1; else et = 0; 
  if et_flag2 in ('F', 'N') then et2 = 1; else et2 = 0; 
  if inst_fund = 'Y' then institutional_fund = 1; else institutional_fund = 0;
  if ret_fund = 'Y' then retail_fund = 1; else retail_fund = 0;
  year = year(caldt);



********************************************************************************************************************************************;
************************************************Start standardizing management company name ************************************************;
********************************************************************************************************************************************;
proc sql;
  create table mgmt_cd as
  select a.crsp_cl_grp, b.crsp_fundno, b.caldt, b.fund_name, b.mgmt_name, b.mgmt_cd as mgmt_no, b.adv_name
  from crspmf.portnomap as a, crspmf.fund_summary2 as b
  where a.crsp_fundno = b.crsp_fundno and b.summary_period = 'Q' and year(caldt) >= 2002 and a.begdt <= b.caldt <= a.enddt;
quit;

proc contents data = mgmt_cd; run;

proc sort data = mgmt_cd nodupkey; by crsp_fundno caldt;

data mgmt_cd; set mgmt_cd; by crsp_fundno caldt;
  retain lmgmt_no;
  if mgmt_no ne '' then lmgmt_no=mgmt_no;
  if first.crsp_fundno and mgmt_no = '' then lmgmt_no = ''; 
  mgmt_no2=mgmt_no;
  if not first.crsp_fundno and mgmt_no = '' and lmgmt_no ne '' then mgmt_no2 = lmgmt_no; 
  drop lmgmt_no;
 
  retain lmgmt_name;
  if mgmt_name ne '' then lmgmt_name=mgmt_name;
  if first.crsp_fundno and mgmt_name = '' then lmgmt_name = ''; 
  mgmt_name2=mgmt_name;
  if not first.crsp_fundno and mgmt_name = '' and lmgmt_name ne '' then mgmt_name2 = lmgmt_name; 
  drop lmgmt_name;

  retain ladv_name;
  if adv_name ne '' then ladv_name=adv_name;
  if first.crsp_fundno and adv_name = '' then ladv_name = ''; 
  adv_name2=adv_name;
  if not first.crsp_fundno and adv_name = '' and ladv_name ne '' then adv_name2 = ladv_name; 
  drop ladv_name;



proc sort data = mgmt_cd; by crsp_fundno descending caldt; run;

data temp.mgmt_cd; set mgmt_cd; by crsp_fundno descending caldt;
  retain lmgmt_no;
  if mgmt_no ne '' then lmgmt_no=mgmt_no;
  if first.crsp_fundno and mgmt_no = '' then lmgmt_no = ''; 
  if not first.crsp_fundno and mgmt_no = '' and lmgmt_no ne '' then mgmt_no2 = lmgmt_no; 
  drop lmgmt_no;

  retain lmgmt_name;
  if mgmt_name ne '' then lmgmt_name=mgmt_name;
  if first.crsp_fundno and mgmt_name = '' then lmgmt_name = ''; 
  if not first.crsp_fundno and mgmt_name = '' and lmgmt_name ne '' then mgmt_name2 = lmgmt_name; 
  drop lmgmt_name;

  retain ladv_name;
  if adv_name ne '' then ladv_name=adv_name;
  if first.crsp_fundno and adv_name = '' then ladv_name = ''; 
  if not first.crsp_fundno and adv_name = '' and ladv_name ne '' then adv_name2 = ladv_name; 
  drop ladv_name;
 
******************************************* Standardize management company code *******************************************;
  mgmt_name = compbl(compress(upcase(mgmt_name),,'p'));
  if mgmt_no2 in ("BLI", "BZW", "BAA") then mgmt_no2 = "BLK"; * Blackrock has multiple entries;
  if mgmt_no2 in ("BZW") and caldt >= '1dec2009'd then mgmt_no2 = "BLK"; * BlackRock acquired BGI;
  if mgmt_no2 in ("SSC", "SSI", "SSR") then mgmt_no2 = "SSB"; * State Stree Corp has multiple entries;
  if mgmt_no2 in ("SAM") and caldt >= '1jan1999'd then mgmt_no2 = "AIG"; * AIG acquired SunAmerica;
  if mgmt_no2 in ("JHI") then mgmt_no2 = "JHA"; * John Hancock has multiple entries;
  if mgmt_no2 in ("FMT", "FDS") then mgmt_no2 = "FID"; * Fidelity Management Trust Company;
  if mgmt_no2 in ("HLD") then mgmt_no2 = "MEL"; * MERRILL LYNCH has multiple entries;
  if mgmt_no2 in ("MEL") and caldt >= '1jan2009'd then mgmt_no2 = "AIG"; * AIG acquired SunAmerica;
  if mgmt_no2 in ("TRN", "AEG") then mgmt_no2 = "TAI"; * TRANSAMERICA FUNDS has multiple entries, AEGON Management Company is a sub;
  if mgmt_no2 in ("MGC", "SCD", "DBL", "DIM", "DBX", "XAL") then mgmt_no2 = "DWS"; * DEUTSCHE BANK has multiple entries;
  if mgmt_no2 in ("SCU") and caldt >= '8apr2002'd then mgmt_no2 = "DWS"; * April 08, 2002: Deutsche Bank today completed its acquisition of Zurich Scudder;
  if mgmt_no2 in ("CLY", "SMC", "RYD", "GPA") then mgmt_no2 = "GIM"; * GUGGENHEIM PARTNERS has multiple entries;
  if index(mgmt_name, 'BOSTON ADVISORS') = 1 and mgmt_no2 = 'BLK' then mgmt_no2 = 'BST'; * BOSTON ADVISORS;
  if mgmt_no2 in ("BAV") then mgmt_no2 = "BAF"; * BROWN ADVISORY FUNDS has multiple entries;
  if mgmt_no2 in ("PMU") then mgmt_no2 = "PLI"; * PACIFIC LIFE GROUP has multiple entries;
  if mgmt_no2 in ("DWI") then mgmt_no2 = "MST"; * MORGAN STANLEY has multiple entries;
  if mgmt_no2 in ("ORI") then mgmt_no2 = "ORD"; * OAK RIDGE INVESTMENTS has multiple entries;
  if mgmt_no2 in ("JDR") then mgmt_no2 = "PRU"; * PRUDENTIAL INVESTMENTS LLC has multiple entries;
  if mgmt_no2 in ("TPF") then mgmt_no2 = "TUP"; * THOMPSON PLUMB FUNDS has multiple entries;
  if index(mgmt_name, 'ABERDEEN') = 1 and mgmt_no2 in ("ESA") then mgmt_no2 = "ABR"; * ABERDEEN has multiple entries;
  if index(mgmt_name, 'AIM') = 1 and mgmt_no2 in ("AIN") then mgmt_no2 = "AIM"; * ABERDEEN has multiple entries;
  if mgmt_no2 in ("NCC") and caldt >= '31dec2008'd then mgmt_no2 = "PNC"; *The PNC Financial Services Group, Inc. announced today that it completed its acquisition of National City Corporation;
  if index(mgmt_name, 'AIM') = 1 and mgmt_no2 in ("ALE") and caldt >= '31dec2008'd then mgmt_no2 = "PNC"; * ABERDEEN has multiple entries;
  if mgmt_no2 in ("MSD") and caldt >= '2mar2007'd then mgmt_no2 = "PNC"; * The PNC Financial Services Group, Inc. (NYSE: PNC) announced today that it has completed its acquisition of Mercantile Bankshares Corporation;
  if mgmt_no2 in ("AAS") then mgmt_no2 = "ATM"; * ASTOR INVESTMENT MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("EQL", "RSN") then mgmt_no2 = "AXA"; * AXA has multiple entries;
  if mgmt_no2 in ("CRG") then mgmt_no2 = "CAL"; * CALVERT FUNDS has multiple entries;
  if mgmt_no2 in ("ROC") then mgmt_no2 = "CNB"; * CITY NATIONAL has multiple entries;  if mgmt_no2 in ("CLK") then mgmt_no2 = "CCG"; * CLARK CAPITAL MANAGEMENT has multiple entries;
  if mgmt_no2 in ("CLC") then mgmt_no2 = "CLY"; * CLAYMORE SECURITIES INC has multiple entries;
  if mgmt_no2 in ("CLY") and caldt >= '15dec2009'd then mgmt_no2 = "GIM"; * Guggenheim Partners, LLC an and Claymore Group Inc. announce the completion of a merger;
  if mgmt_no2 in ("COL", "LBR", "CLB") then mgmt_no2 = "COL"; * COLUMBIA FUNDS has multiple entries;
  if mgmt_no2 in ("COL") and caldt >= '3may2010'd then mgmt_no2 = "AMP"; * Ameriprise Financial Completes Columbia Management Acquisition;
  if index(mgmt_name, 'AMERIPRISE') = 1 and mgmt_no2 in ("AXP") then mgmt_no2 = "AMP"; * ;
  if index(mgmt_name, 'RIVERSOURCE') = 1 and mgmt_no2 in ("AXP") then mgmt_no2 = "AMP"; * ;
  if index(mgmt_name, 'COLUMBIA') = 1 and mgmt_no2 in ("AXP") then mgmt_no2 = "AMP"; * ;
  if mgmt_no2 in ("CWI") then mgmt_no2 = "MBI"; * CUTWATER has multiple entries;


  if mgmt_no2 in ("DLC") then mgmt_no2 = "DLF"; * DOUBLELINE CAPITAL LP has multiple entries;
  if index(mgmt_name, 'DUNDEEWEALTH') = 1 and mgmt_no2 in ("SCT") then mgmt_no2 = "BHR"; * DundeeWealth has multiple entries;
  if mgmt_no2 in ("FQI") and caldt >= '13dec2011'd then mgmt_no2 = "EAM"; * Envestnet Completes FundQuest Acquisition ;
  if mgmt_no2 in ("EQI") then mgmt_no2 = "EQM"; * EQUINOX FUND MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("ETG") then mgmt_no2 = "EMG"; * ETF MANAGERS CAPITAL LLC has multiple entries;
  if mgmt_no2 in ("ESA") then mgmt_no2 = "ETS"; * ETF SECURITIES has multiple entries;
  if mgmt_no2 in ("EPH") then mgmt_no2 = "EUP"; * EURO PACIFIC ASSET MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("GLD") then mgmt_no2 = "GSC"; * GOLDMAN SACHS CO has multiple entries;
  if mgmt_no2 in ("AIP") and caldt >= '30aug2009'd then mgmt_no2 = "HCI"; * Hatteras Funds to acquire Alternative Investment Partners;
  if mgmt_no2 in ("HEN") then mgmt_no2 = "HNF"; * HENNESSY FUNDS has multiple entries;
  if mgmt_no2 in ("HNL") then mgmt_no2 = "HEF"; * HENSSLER EQUITY FUND has multiple entries;

  if mgmt_no2 in ("HFM") then mgmt_no2 = "HCM"; * HIGHLAND CAPITAL MANAGEMENT LP has multiple entries;
  if mgmt_no2 in ("HOW") then mgmt_no2 = "HWD"; * HOWARD CAPITAL MANAGEMENT INC has multiple entries;
  if mgmt_no2 in ("IVR") then mgmt_no2 = "INN"; * INNOVATOR MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("ICA", "IVS") then mgmt_no2 = "AIM"; * INVESCO AIM has multiple entries;
  if mgmt_no2 in ("PWR") and caldt >= '18sep2006'd then mgmt_no2 = "AIM"; * Invesco acquired PowerShares Capital Management LLC;
  if mgmt_no2 in ("JAL") then mgmt_no2 = "JAA"; * JAMES ALPHA ADVISORS LLC has multiple entries;
  if mgmt_no2 in ("MOR") then mgmt_no2 = "JPM"; * JPMORGAN FUNDS has multiple entries;
  if index(mgmt_name, 'JPM') = 1 and mgmt_no2 in ("BCO") then mgmt_no2 = "JPM"; *  has multiple entries;
  if mgmt_no2 in ("LDR") then mgmt_no2 = "LCC"; * LEADER CAPITAL CORPORATION has multiple entries;
  if mgmt_no2 in ("LMI") then mgmt_no2 = "LMD"; * LEE MUNDER CAPITAL GROUP LLC has multiple entries;
  if mgmt_no2 in ("LEG") then mgmt_no2 = "LGW"; * LEGG MASON has multiple entries;
  if mgmt_no2 in ("BFA", "MAD") then mgmt_no2 = "MAA"; * MADISON ASSET MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("MTD") then mgmt_no2 = "MKA"; * MARKETFIELD ASSET MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("MYR") then mgmt_no2 = "MCM"; * MEYERS CAPITAL MANAGEMENT GROUP LLC has multiple entries;
  if mgmt_no2 in ("GMS") then mgmt_no2 = "NMM"; * NAVIGATOR MONEY MANAGEMENT INC has multiple entries;
  if mgmt_no2 in ("NCM") then mgmt_no2 = "NFM"; * NEIMAN FUNDS MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("NEB", "NBI") then mgmt_no2 = "NEU"; * NEUBERGER BERMAN LLC has multiple entries;
  if mgmt_no2 in ("ORI") then mgmt_no2 = "ORN"; * ORINDA FUNDS has multiple entries;
  if mgmt_no2 in ("OWC") then mgmt_no2 = "OST"; * OSTERWEIS CAPITAL MANAGEMENT INC has multiple entries;
  if mgmt_no2 in ("PNL") then mgmt_no2 = "PNA"; * PINNACLE ADVISORS LLC has multiple entries;
  if mgmt_no2 in ("ANM") then mgmt_no2 = "PNE"; * PIONEER INVESTMENT MANAGEMENT INC has multiple entries;
  if mgmt_no2 in ("PRF") then mgmt_no2 = "PFS"; * PROFUNDS GROUP has multiple entries;
  if mgmt_no2 in ("RNT") then mgmt_no2 = "RTA"; * REICH TANG ASSET MANAGEMENT has multiple entries;
  if mgmt_no2 in ("TRU") then mgmt_no2 = "STI"; * RIDGEWORTH FUNDS has multiple entries;
  if mgmt_no2 in ("STI") and caldt >= '1jun2017'd then mgmt_no2 = "VIR"; * Virtus Completes Acquisition of RidgeWorth Investments;
  if mgmt_no2 in ("VEA", "PHX", "VRI") then mgmt_no2 = "VIR"; * VIRTUS INVESTMENT PARTNERS INC has multiple entries;
  if mgmt_no2 in ("RNC") then mgmt_no2 = "RNG"; * RNC CAPITAL MANAGEMENT has multiple entries;
  if index(mgmt_name, 'SCHRODER') = 1 and mgmt_no2 in ("SCD") then mgmt_no2 = "SCH"; * ABERDEEN has multiple entries;
  if index(mgmt_name, 'SEGALL BRYANT HAMILL') = 1 then mgmt_no2 = "SBH"; * ABERDEEN has multiple entries;
  if mgmt_no2 in ("SET") then mgmt_no2 = "SEI"; * SEI GROUP has multiple entries;
  if mgmt_no2 in ("SLG") then mgmt_no2 = "SEL"; * SELIGMAN FUNDS has multiple entries;
  if mgmt_no2 in ("SEL") and caldt >= '7nov2008'd then mgmt_no2 = "AMP"; * Ameriprise Financial Completes Acquisition of J. & W. Seligman & Co.;

  if mgmt_no2 in ("SKF") then mgmt_no2 = "SKY"; * SKYLINE ASSET MANAGEMENT LP has multiple entries;
  if mgmt_no2 in ("STL") and caldt >= '16feb2005'd then mgmt_no2 = "BBT"; * BB&T Buys Sterling Capital Stake;
  if mgmt_no2 in ("AAL") then mgmt_no2 = "ALU"; * THRIVENT INVESTMENT MANAGEMENT INC has multiple entries;
  if mgmt_no2 in ("CIG") then mgmt_no2 = "TMS"; * TIMESSQUARE CAPITAL MANAGEMENT LLC has multiple entries;
  if mgmt_no2 in ("TCS", "TIS") then mgmt_no2 = "TRT"; * TORTOISE CAPITAL ADVISORS LLC has multiple entries;
  if mgmt_no2 in ("PWA") then mgmt_no2 = "UBS"; * UBS has multiple entries;
  if mgmt_no2 in ("VUI") then mgmt_no2 = "VAM"; * VONTOBEL ASSET MANAGEMENT INC has multiple entries;
  if mgmt_no2 in ("IVY") then mgmt_no2 = "WNR"; * WADDELL REED INVESTMENT MGMT COMPANY has multiple entries;
  if mgmt_no2 in ("MTB") then mgmt_no2 = "WIL"; * WILMINGTON has multiple entries;
  if mgmt_no2 in ("LOT") then mgmt_no2 = "ZNC"; * ZIEGLER CAPITAL MANAGEMENT LLC has multiple entries;
  year = year(caldt);

proc sort; by mgmt_name;

proc sort data = temp.mgmt_cd out = mgmt_cd_unique (keep=mgmt_name mgmt_no2 year) nodupkey; by mgmt_name mgmt_no2 year; where mgmt_no2 ne '' and mgmt_name ne '';

data mgmt_cd1a; set temp.mgmt_cd;
  if mgmt_no2 ne '';

data temp; set temp.mgmt_cd;
  if mgmt_no2 = '';
  drop mgmt_no2;

proc sql;
  create table temp as
  select a.*, b.mgmt_no2
  from temp as a left join mgmt_cd_unique as b
  on a.mgmt_name = b.mgmt_name and a.year = b.year;
quit;

data mgmt_cd1b; set temp;
  if mgmt_no2 ne '';
  if mgmt_no2 in ("COL") and caldt >= '3may2010'd then mgmt_no2 = "AMP"; * Ameriprise Financial Completes Columbia Management Acquisition;

proc sort nodupkey; by crsp_fundno caldt;

data temp1; set temp;
  if mgmt_no2 = '';
  drop mgmt_no2;

proc sql;
  create table temp1 as
  select a.*, b.mgmt_no2, abs(a.year-b.year) as diff
  from temp1 as a left join mgmt_cd_unique as b
  on a.mgmt_name = b.mgmt_name;
quit;

proc sort; by crsp_fundno caldt diff;

data temp1; set temp1; by crsp_fundno caldt;
  if first.caldt;
run;

data mgmt_cd1c; set temp1;
  if mgmt_no2 ne '';

proc sort nodupkey; by crsp_fundno caldt;

data temp1; set temp1;
  if mgmt_no2 = '';
  mgmt_no2 = substr(mgmt_name, 1, 3);
  if mgmt_name = 'AIG SUNAMERICA RETIREMENT MARKETS' then mgmt_no2 = 'AIG';
  if mgmt_name = 'AMERICAN UNITED LIFE INSURANCE COMPANY' then mgmt_no2 = 'AUL';
  if mgmt_name = 'AST INVESTMENT SERVICES INC' then mgmt_no2 = 'ASTI';
  if mgmt_name = 'BRIGHTHOUSE INVESTMENT ADVISERS LLC' then mgmt_no2 = 'BRII';
  if mgmt_name = 'COLUMBIA WANGER ASSET MANAGEMENT LLC' then mgmt_no2 = 'CWA';
  if mgmt_name = 'COLUMBIA WANGER ASSET MANAGEMENT LP' then mgmt_no2 = 'CWA';
  if mgmt_name = 'DIRECTED SERVICES INC' then mgmt_no2 = 'DSI';
  if mgmt_name = 'DIRECTED SERVICES LLC' then mgmt_no2 = 'DSI';
  if mgmt_name = 'EQUITABLE FINANCIAL LIFE INSURANCE CO' then mgmt_no2 = 'EFL';
  if mgmt_name = 'FRANKLIN ALT STRATEGIES ADVIERS LLC' then mgmt_no2 = 'FRAA';
  if mgmt_name = 'GLOBAL ATLANTIC INVESTMENT ADVISORS LLC' then mgmt_no2 = 'GAI';
  if mgmt_name = 'JANUS ASPEN SERIES' then mgmt_no2 = 'JAS';
  if mgmt_name = 'PARK AVENUE INSTITUTIONAL ADVISERS LLC' then mgmt_no2 = 'PAIA';
  if mgmt_name = 'SYMETRA INVESTMENT MANAGEMENT INC' then mgmt_no2 = 'SYMI';
  if mgmt_name = 'VALMARK ADVISERS INC' then mgmt_no2 = 'VALM';

data mgmt_cd1d; set temp1;
  if mgmt_no2 ne '';

data mgmt_cd1e; set temp1;
  if mgmt_no2 = '';
  mgmt_name2 = compbl(compress(upcase(mgmt_name2),,'p'));

proc sql;
  create table mgmt_cd1e as
  select a.*, b.mgmt_no2, abs(a.year-b.year) as diff
  from mgmt_cd1e (drop=mgmt_no2 diff) as a left join mgmt_cd_unique as b
  on a.mgmt_name2 = b.mgmt_name;
quit;

proc sort; by crsp_fundno caldt diff;

data mgmt_cd1e; set mgmt_cd1e; by crsp_fundno caldt;
  if first.caldt;
run;

data temp.mgmt_cd; set mgmt_cd1a mgmt_cd1b mgmt_cd1c mgmt_cd1d mgmt_cd1e;
  if mgmt_no2 ne ''; 
  drop diff;
run;

********************************************************************************************************************************************;
*********************************************** End of standardizing management company name ***********************************************;
********************************************************************************************************************************************;




proc sql;
  create table fundqtr2 as
  select a.*, b.mgmt_no2
  from fundqtr2 (drop=mgmt_no2) as a left join temp.mgmt_cd as b
  on a.crsp_fundno = b.crsp_fundno and a.caldt = b.caldt;
quit; 

proc sort data = fundqtr2 (keep=mgmt_name year mgmt_no2 crsp_fundno) nodupkey out = mgmt_no; by mgmt_name year mgmt_no2 crsp_fundno; where mgmt_name ne '' and mgmt_no2 ne '';

proc means noprint data = mgmt_no; by mgmt_name year mgmt_no2;
  var crsp_fundno;
  output out = mgmt_no1 (drop=_type_ _freq_) n = num_funds;

proc sort; by mgmt_name year descending num_funds;

data mgmt_no1; set mgmt_no1; by mgmt_name year descending num_funds;
  if first.year;

proc sql;
  create table fundqtr2 as
  select a.*, b.mgmt_no2 as mgmt_no2b
  from fundqtr2 as a left join mgmt_no1 as b
  on a.mgmt_name = b.mgmt_name and a.year = b.year;
quit;

data fundqtr_info; set fundqtr2;
  if mgmt_no2b ne '' and mgmt_no2 = '' then filled = "Same management company name in the same year";
  if mgmt_no2b ne '' then mgmt_no2 = mgmt_no2b;
  drop mgmt_no2b; 
run;
******************************************* Standardize management company code *******************************************;



***************************** Fill in missing management company information  ***************************************************************************************;
data temp1a; set fundqtr_info;
  if mgmt_no2 = ''; 
  firstword = substr(fund_name2, 1, anyspace(fund_name2, 1)-1);
  year = year(caldt);
  drop mgmt_no2;

proc sort nodupkey out = list (keep=firstword year); by firstword year;
run; * funds with missing management company information;


data temp1b; set fundqtr_info;
  if mgmt_no2 ne ''; 
  firstword = substr(fund_name2, 1, anyspace(fund_name2, 1)-1);
  year = year(caldt);

proc sort nodupkey out = fill (keep=firstword year mgmt_no2); by firstword year mgmt_no2;
run; * funds with nonmissing management company information;

proc sql;
  create table list1 as
  select a.firstword, a.year, b.mgmt_no2
  from list as a left join fill as b
  on a.firstword = b.firstword and a.year = b.year;
quit;

proc sort; by firstword year;

proc means noprint; by firstword year;
  var year;
  output out = num (drop=_type_ _freq_) n = num;

data keep; merge list1 num; by firstword year;
  if num = 1 and mgmt_no2 ne '';
  drop num;
run; * The first word of the fund family is unique;

proc sql;
  create table temp1a as
  select a.*, b.mgmt_no2
  from temp1a as a left join keep as b
  on a.firstword = b.firstword and a.year = b.year;
quit;

data temp1; set temp1a (in=a) temp1b;
  if a and mgmt_no2 ne '' then filled = "Same unique first word of fund name in the same year   ";
  drop firstword year;
run;


data temp1c; set temp1;
  if mgmt_no2 = ''; 
  space1 = anyspace(fund_name2, 1);
  space2 = anyspace(fund_name2, space1+1);
  first2words = substr(fund_name2, 1, space2-1);
  year = year(caldt);
  drop mgmt_no2 space1 space2;

proc sort nodupkey out = list (keep=first2words year); by first2words year;
run;


data temp1d; set temp1;
  if mgmt_no2 ne ''; 
  space1 = anyspace(fund_name2, 1);
  space2 = anyspace(fund_name2, space1+1);
  first2words = substr(fund_name2, 1, space2-1);
  year = year(caldt);
  drop space1 space2;

proc sort nodupkey out = fill (keep=first2words year mgmt_no2); by first2words year mgmt_no2;
run;

proc sql;
  create table list1 as
  select a.first2words, a.year, b.mgmt_no2
  from list as a left join fill as b
  on a.first2words = b.first2words and a.year = b.year;
quit;

proc sort nodupkey; by first2words year mgmt_no2;

proc means noprint; by first2words year;
  var year;
  output out = num (drop=_type_ _freq_) n = num;

data keep; merge list1 num; by first2words year;
  if num = 1 and mgmt_no2 ne '';
  drop num;

proc sql;
  create table temp1c as
  select a.*, b.mgmt_no2
  from temp1c as a left join keep as b
  on a.first2words = b.first2words and a.year = b.year;
quit;

data temp2; set temp1c (in=a) temp1d;
  drop first2words;
  if a and mgmt_no2 ne '' then filled = "Same unique first two words of fund name in the same yr";
run;

proc freq data = temp2; tables filled; 
run;



data temp1e; set temp2;
  if mgmt_no2 = ''; 
  if index(fund_name2, ":") > 0 then series = substr(fund_name2, 1, index(fund_name2, ":")-1);
  if index(fund_name2, ":") = 0 then series = substr(fund_name2, 1, index(fund_name2, ";")-1);
  year = year(caldt);
  drop mgmt_no2;

proc sort nodupkey out = list (keep=series year); by series year;
run;


data temp1f; set temp2;
  if mgmt_no2 ne ''; 
  if index(fund_name2, ":") > 0 then series = substr(fund_name2, 1, index(fund_name2, ":")-1);
  if index(fund_name2, ":") = 0 then series = substr(fund_name2, 1, index(fund_name2, ";")-1);
  year = year(caldt);

proc sort nodupkey out = fill (keep=series year mgmt_no2); by series year mgmt_no2;
run;

proc sql;
  create table list1 as
  select a.series, a.year, b.mgmt_no2
  from list as a left join fill as b
  on a.series = b.series and a.year = b.year;
quit;

proc sort nodupkey; by series year mgmt_no2;

proc means noprint; by series year;
  var year;
  output out = num (drop=_type_ _freq_) n = num;

data keep; merge list1 num; by series year;
  if num = 1 and mgmt_no2 ne '';
  drop num;

proc sql;
  create table temp1e as
  select a.*, b.mgmt_no2
  from temp1e as a left join keep as b
  on a.series = b.series and a.year = b.year;
quit;

data fundqtr_info; set temp1e (in=a) temp1f (in=b); 
  if a and mgmt_no2 ne '' then filled = "Same unique first word of series name in the same year";
  if a and mgmt_no2 = '' then filled = "Not filled";
  if b and filled = '' then filled = "Not necessary";
  drop series year; 

proc freq; tables filled;
run;


proc sort data = crspmf.Monthly_tna_ret_nav out = fund_age (keep=crsp_fundno caldt); by crsp_fundno caldt; where mtna ne . or mret ne .;

data fund_age; set fund_age; by crsp_fundno;
  if first.crsp_fundno;
  first_obs_dt = caldt;
  format first_obs_dt date9.;
  drop caldt;

proc sort data = fundqtr_info; by crsp_fundno;

data fundqtr_info; merge fundqtr_info (in=a) fund_age; by crsp_fundno;
  if a;
  fund_age = intck('month', first_obs_dt, caldt);
run;
***************************** END OF Fill in missing management company information  ***************************************************************************************;


proc sort data = fundqtr_info; by wficn crsp_cl_grp caldt; 

data uniq; set fundqtr_info; by wficn crsp_cl_grp caldt;
  if first.caldt and last.caldt;
  keep wficn crsp_fundno crsp_cl_grp fund_name caldt adv_name ticker fund_age msa_name mtna expenses frnt_ld rear_ld mgmt_no turnover mgmt_fee actual_12b1 expenses2 frnt_ld2 rear_ld2 mgmt_no2 turnover mgmt_fee2 actual_12b12 lipper_obj_cd si_obj_cd lipper_class wbrger_obj_cd lipper_asset_cd fiscal_yearend
    crsp_obj_cd crsp_obj_cd2 lipper_obj_cd2 si_obj_cd2 lipper_class2 wbrger_obj_cd2 lipper_asset_cd2 yield yield2 stock mgr_name index_fund index_fund2 institutional_fund retail_fund et et2; *team_managed sole_managed co_managed anonymously_managed;
run;

data multiple; set fundqtr_info; by wficn crsp_cl_grp caldt;
  if not (first.caldt and last.caldt);
run;

proc means data = multiple noprint; by wficn crsp_cl_grp caldt;
  var mtna; where mtna ne .;
  output out = sum2 (drop=_type_ _freq_) sum(mtna)=mtna; 
run;

proc means data = multiple noprint; by wficn crsp_cl_grp caldt;
  var yield yield2 expenses frnt_ld rear_ld turnover mgmt_fee expenses2 frnt_ld2 rear_ld2 mgmt_fee2 actual_12b1 actual_12b12 stock institutional_fund retail_fund; weight mtna; where mtna ne .;
  output out = mean2 (drop=_type_ _freq_) mean=yield yield2 expenses frnt_ld rear_ld turnover mgmt_fee expenses2 frnt_ld2 rear_ld2 mgmt_fee2 actual_12b1 actual_12b12 stock institutional_fund retail_fund; 
run;


proc sort data = multiple; by wficn crsp_cl_grp caldt mtna; 

data mgmt_co; set multiple; by wficn crsp_cl_grp caldt mtna;
  if last.caldt;
keep wficn crsp_cl_grp caldt msa_name crsp_fundno crsp_cl_grp fund_name adv_name ticker mgr_name mgmt_no mgmt_no2 crsp_obj_cd crsp_obj_cd2 lipper_obj_cd si_obj_cd lipper_class wbrger_obj_cd lipper_asset_cd fiscal_yearend index_fund index_fund2 et et2; * ioc mgrcoab team_managed sole_managed co_managed anonymously_managed ;

proc sort data = multiple; by wficn crsp_cl_grp caldt fund_age; 

data age; set multiple; by wficn crsp_cl_grp caldt fund_age;
  if last.caldt;
keep wficn crsp_cl_grp caldt fund_age;

data multiple2; merge sum2 mean2 mgmt_co age; by wficn crsp_cl_grp caldt;
  
data fundinfo_qtr; set uniq multiple2;

*** concentration of mutual funds by MSA;
proc sort data = fundinfo_qtr; by msa_name caldt;

proc means noprint data = fundinfo_qtr; by msa_name caldt;
  var mtna; where msa_name ne '';
  output out = msa_level (drop=_type_ _freq_) sum = mtna_msa n = num_funds_msa;

proc sort data = msa_level; by caldt descending mtna_msa;

data msa_level; set msa_level; by caldt descending mtna_msa;
  if first.caldt then rank_mtna_msa = 0; 
  rank_mtna_msa+1;

proc sort data = msa_level; by caldt descending num_funds_msa;

data msa_level; set msa_level; by caldt descending num_funds_msa;
  if first.caldt then rank_nfunds_msa = 0; 
  rank_nfunds_msa+1;

proc rank data = msa_level out = msa_level groups = 10; by caldt;
  var mtna_msa num_funds_msa;
  ranks dec_mtna_msa dec_num_funds_msa;

proc sort; by msa_name caldt;

data fundinfo_qtr; merge fundinfo_qtr msa_level; by msa_name caldt;
run;

*** family size;
proc sort data = fundinfo_qtr; by mgmt_no2 caldt;

proc means data = fundinfo_qtr noprint; by mgmt_no2 caldt;
  var mtna; where mtna ne . and mgmt_no2 ne '';
  output out = family_tna (drop=_type_ _freq_) sum = family_tna n = num_funds;

data fundinfo_qtr2; merge fundinfo_qtr (in=a) family_tna; by mgmt_no2 caldt; if a;
  if family_tna = . then do; family_tna = mtna; num_funds = 1; end;
  family_tna_ex = family_tna-mtna;
run;




******************************************** Portfolio characteristics ********************************************;
******************************************** Portfolio characteristics ********************************************;
******************************************** Portfolio characteristics ********************************************;
proc sort data = temp.s12_holdings (keep=wficn rdate fdate) nodupkey out = s12_dates; by wficn rdate fdate;
run;

proc sql;
  create table fundinfo_qtr2x as
  select a.*, b.rdate, b.fdate
  from fundinfo_qtr2 as a left join s12_dates as b
  on a.wficn = b.wficn and 0 <= intck('month', b.rdate, a.caldt) <= 6;
quit;

proc sort data = fundinfo_qtr2x; by wficn crsp_cl_grp caldt descending rdate fdate; 

data fundinfo_qtr2x; set fundinfo_qtr2x; by wficn crsp_cl_grp caldt descending rdate; 
  if first.caldt;
run;

proc sql;
  create table port1a as
  select a.*, b.permno, b.market_value, b.shares_adj as nbr_shares, b.wt_adj as wt, b.market_value/(a.mtna*1000000) as percent_tna
  from fundinfo_qtr2x (keep=wficn crsp_cl_grp caldt rdate fdate mtna) as a, temp.s12_holdings as b
  where a.wficn = b.wficn and a.rdate = b.rdate and a.fdate = b.fdate;
quit;



/* ********************************************************************************** */
/* ******* Prepare data to compute size, value, and momentum scores for stocks ********/
/* ********************************************************************************** */
proc sort data=comp.funda (keep=gvkey cusip conm datadate datafmt indfmt popsrc consol fyear fyr sich naicsh fic xad cik act lct epspx emp
  che at dltt lt pstkl pstk sale oibdp dp ib dvc csho dlc txditc cogs xrd pstkrv ceq txdb dcvt lt xsga prcc_f ppent xint txt capx aqc curcd) out=cst nodupkey;
  where at > 0 and sale >= 0 and 2018 >= fyear >= 2001 and indfmt= 'INDL' and datafmt= 'STD' and popsrc= 'D' and consol= 'C' and curcd = 'USD'; 
  by gvkey fyear;
run;

data cst; set cst;
  if fyr gt 0;
  cyear = fyear;
  if fyr le 5 and fyr gt 0 then cyear = cyear+1;
  fyrenddt = intnx('month', mdy(fyr, 28, cyear), 0, 'end');
  format fyrenddt mmddyyn8.;
  drop indfmt datafmt popsrc consol curcd;

proc sql;
  create table cst_matched as 
  select a.*, b.lpermno as permno, b.lpermco as permco, b.linktype, b.linkprim, b.liid
  from cst as a, crspcc.ccmxpf_linktable as b
  where a.gvkey = b.gvkey and linktype in ("LC", "LN", "LU", "LX", "LD", "LS") and
        usedflag in (1) and (fyrenddt >= linkdt or linkdt = .B) and (fyrenddt <= linkenddt or linkenddt = .E)
  order by gvkey, fyrenddt, lpermno, lpermco, linkdt, linkenddt; 
quit; 

proc sort data = cst_matched; by gvkey permco permno fyear linktype descending linkprim liid;

data cst_matched1; set cst_matched; by gvkey permco permno fyear linktype descending linkprim liid;
  if first.fyear;
run;

data report_dates1a; set s12.holdings;
  mend = intnx('month', rdate, 0, 'end');
  format mend date9.;
  keep mend;

proc sort data = temp.holdings out = report_dates1b (keep=report_dt) nodupkey; by report_dt;

data report_dates1b; set report_dates1b;
  mend = intnx('month', report_dt, 0, 'end');
  format mend date9.;
  keep mend;

data report_dates; set report_dates1a report_dates1b;

proc sort nodupkey; by mend; where mend ne .;run;
 
proc sort data=crsp.stocknames (keep= permno permco comnam namedt nameenddt shrcd ticker exchcd siccd) out = permnolist nodupkey;
  by permno namedt nameenddt; where shrcd in (10,11,12) and exchcd in (1, 2, 3);
run;

proc sql;
  create table permnolist as 
  select a.*, b.mend
  from permnolist as a, report_dates as b
  where a.namedt <= b.mend <= a.nameenddt; 
quit; 

proc sql undo_policy = none;
	create table cst_list as select distinct
	a.*, b.fyrenddt, b.gvkey
	from permnolist a, cst_matched1 b
	where (a.permno = b.permno) and 3 <= intck('month', b.fyrenddt, a.mend) <= 18; 

	* add market cap, shares outstanding, number of share class at fyrenddt;
	create table cst_list as select distinct
	a.*, (abs(b.prc)*b.shrout/1000) as mve_crsp_permno, abs(b.prc) as prc_crsp_permno, b.shrout as shrout_permno, b.date as mve_date 
	from cst_list a left join crsp.msf b
	on (a.permno = b.permno) and intck('month', b.date, a.mend) = 0; 

	* merge with CRSP;
	create table cst_matched1b as select 
	*
	from cst_list as a left join cst_matched1 as b
	on a.fyrenddt = b.fyrenddt and a.permno =  b.permno and a.gvkey = b.gvkey; 
quit;

proc sort data = cst_matched1b nodupkey; by gvkey mend permco permno fyrenddt; **** DUAL-class firms show up as two observations in this dataset;

data cst_matched1b; set cst_matched1b; by gvkey mend permco permno fyrenddt;
  if last.permno; 

proc means data = cst_matched1b noprint; by gvkey mend permco; 
  var mve_crsp_permno shrout_permno; 
  output out = cst_combined (drop=_type_ _freq_) sum = mve_crsp shrout;
run;

proc sql;
  create table cst_matched2 as select 
  *
  from cst_matched1b a, cst_combined b
  where a.gvkey = b.gvkey and a.mend = b.mend and a.permco = b.permco; 
quit;

data cst_matched2a; set cst_matched2;
  if permco ne .;
  fyme = prcc_f*csho;
  pct_diff = mve_crsp/fyme - 1;
  fyme2 = fyme;
  if fyme < mve_crsp then fyme2 = mve_crsp;
  ratio = csho*1000/shrout;
  bm = ceq/mve_crsp;

  name_dis = min(spedis(conm, comnam), spedis(comnam, conm));

proc sort data = cst_matched2a nodupkey; by gvkey permco mend permno; run;

* calculate the stock return during a 12-month period ending the report date;
proc sql;
  create table ret as
  select a.*, log(1+b.ret) as logret, b.ret, b.date
  from cst_matched2a (keep=gvkey permco mend permno) as a, crsp.msf as b
  where a.permno = b.permno and 0 <= intck('month', b.date, a.mend) <= 11;
quit;

proc sort; by gvkey permco mend permno; 

proc means noprint; by gvkey permco mend permno; 
  var logret ret;
  output out = ret_pr12m (drop=_type_ _freq_) sum = clogret car_pr12m;

data cst_matched2a; merge cst_matched2a ret_pr12m;
  bhret_pr12m = exp(clogret)-1;
  drop clogret;

proc means n mean median min max; 
  var bhret_pr12m car_pr12m bm mve_crsp;

proc sort; by mend;

proc rank data = cst_matched2a out = size_cutoff groups = 5; by mend;
  var mve_crsp; where exchcd = 1;
  ranks r_mve_crsp; 
********* We form the book-to-market and the momentum quintiles by dividing the stocks equally into the five  *********
********* groups. On the other hand, we form the size quintiles by using cut-offs from the NYSE only. *****************;

proc sort; by mend r_mve_crsp;

proc means noprint; by mend r_mve_crsp;
  var mve_crsp;
  output out = size_cutoff (drop=_type_ _freq_) min=size_min max=size_max;

data size_cutoff1; set size_cutoff;
  if r_mve_crsp = 0;
  size1 = size_max;
  keep mend size1;

data size_cutoff2; set size_cutoff;
  if r_mve_crsp = 1;
  size2 = size_max;
  keep mend size2;

data size_cutoff3; set size_cutoff;
  if r_mve_crsp = 2;
  size3 = size_max;
  keep mend size3;

data size_cutoff4; set size_cutoff;
  if r_mve_crsp = 3;
  size4 = size_max;
  keep mend size4;

data size_cutoffs; merge size_cutoff1 size_cutoff2 size_cutoff3 size_cutoff4; by mend;

proc rank data = cst_matched2a out = cst_matched2a groups = 5; by mend;
  var bm bhret_pr12m;
  ranks r_bm r_bhret_pr12m;

data temp.stocks_char; merge cst_matched2a size_cutoffs; by mend;
  if mve_crsp > . and mve_crsp <= size1 then r_size = 1; else
  if mve_crsp <= size2 then r_size = 2; else
  if mve_crsp <= size3 then r_size = 3; else
  if mve_crsp <= size4 then r_size = 4; else
  r_size = 5;

  r_btm = r_bm+1;
  r_ret = r_bhret_pr12m+1;
  drop r_bm r_bhret_pr12m;

  keep permno mend r_size r_btm r_ret mve_crsp bm bhret_pr12m;
run;


/* ********************************************************************************** */
/* *********** End of computing size, value, and momentum scores for stocks ***********/
/* ********************************************************************************** */



proc sql;
  create table port1a as
  select a.*, b.r_size, b.r_btm, b.r_ret, log(b.mve_crsp) as log_size, log(b.bm) as log_bm, b.bhret_pr12m
  from port1a as a left join temp.stocks_char as b
  on a.permno = b.permno and year(a.rdate) = year(b.mend) and month(a.rdate) = month(b.mend); 
quit;



***** LINK TO CRSPMF holdings;
data fundinfo_qtr2y; set fundinfo_qtr2x;
  if rdate = .; 

proc sort data = fundinfo_qtr2y out = no_s12 (keep=wficn crsp_cl_grp caldt) nodupkey; by crsp_cl_grp caldt;  run;

proc sort data = temp.holdings out = fund_dates (keep=crsp_cl_grp report_dt) nodupkey; by crsp_cl_grp report_dt;

proc sql;
  create table no_s12 as
  select a.*, b.report_dt as rdate
  from no_s12 as a left join fund_dates as b
  on a.crsp_cl_grp = b.crsp_cl_grp and 0 <= intck('month', b.report_dt, a.caldt) <= 6;
quit;

proc sort data = no_s12; by crsp_cl_grp caldt rdate;

data no_s12; set no_s12; by crsp_cl_grp caldt rdate;
  if last.caldt and rdate ne . and crsp_cl_grp ne .;
run;

proc sql;
  create table fundinfo_qtr2y as
  select a.*, b.rdate
  from fundinfo_qtr2y (drop=rdate fdate) as a left join no_s12 as b
  on a.crsp_cl_grp = b.crsp_cl_grp and a.caldt = b.caldt;
quit;
 
proc sql;
  create table port1b as
  select a.*, b.permno, b.nbr_shares, b.wt, b.market_value, b.percent_tna
  from fundinfo_qtr2y as a, temp.holdings as b
  where a.crsp_cl_grp = b.crsp_cl_grp and a.rdate = b.report_dt;
quit;run; * temp.holdings is from 02Holdings.sas;
    
proc sql;
  create table port1b as
  select a.*, b.r_size, b.r_btm, b.r_ret, log(b.mve_crsp) as log_size, log(b.bm) as log_bm, b.bhret_pr12m
  from port1b as a left join temp.stocks_char as b
  on a.permno = b.permno and year(a.rdate) = year(b.mend) and month(a.rdate) = month(b.mend); 
quit;
run;

data port2; set port1a port1b;
  if market_value > 0;
run;


***** calculate holdings characteristics;
proc sort data = port2; by wficn crsp_cl_grp caldt rdate mtna; 

proc means data = port2 noprint; by wficn crsp_cl_grp caldt rdate mtna;
  var r_size r_btm r_ret log_size log_bm bhret_pr12m; weight market_value; where market_value > 0; 
  output out = holdings_char (drop=_type_ _freq_) mean = size_score value_score mom_score mean_log_size mean_log_bm mean_return;
run;


*** retrieve common stocks;
proc sort data=crsp.stocknames(keep=permco permno ncusip cusip ticker comnam namedt nameenddt st_date end_date shrcd exchcd where=(not missing(ncusip))) 
  out=permno nodupkey; by permno namedt nameenddt;
run;

proc sql;
  create table port2 as
  select a.*, b.shrcd, b.exchcd
  from port2 as a left join permno as b
  on a.permno = b.permno and b.namedt <= a.caldt <= b.nameenddt and shrcd in (10, 11, 12);
quit;

proc sort data = port2; by wficn crsp_cl_grp caldt;

proc means data = port2 noprint; by wficn crsp_cl_grp caldt;
  var market_value; 
  output out = port_sum1a (drop=_type_ _freq_) sum = port_value_total;

proc means data = port2 noprint; by wficn crsp_cl_grp caldt;
  var market_value; where shrcd in (10, 11, 12);
  output out = port_sum1b (drop=_type_ _freq_) sum = port_value_common;

data port2_; merge port2 port_sum1a port_sum1b; by wficn crsp_cl_grp caldt; 
  if wt >= .05 then block5 = 1; else if wt ne . then block5 = 0;
  if wt >= .01 then block1 = 1; else if wt ne . then block1 = 0;
  port_wt = market_value/port_value_common;
  pwt = market_value/port_value_total;
run;

proc sort data = port2_; by wficn crsp_cl_grp caldt;

proc means data = port2_ noprint; by wficn crsp_cl_grp caldt;
  var permno wt block5 block1; where shrcd in (10, 11, 12);
  output out = port2a (drop=_type_ _freq_) n(permno) = num_stocks mean(wt) = wt mean(block5) = block5 mean(block1) = block1; 

proc means n mean median min p1 p5 p95 p99 max;
run;

proc means data = port2_ noprint; by wficn crsp_cl_grp caldt;
  var permno wt; 
  output out = port2b (drop=_type_ _freq_) n(nbr_shares) = num_securities; 

proc sort data = port2_ out = port2x; by wficn crsp_cl_grp caldt descending market_value; where shrcd in (10, 11, 12);

data port2x; set port2x; by wficn crsp_cl_grp caldt; 
  if first.caldt then rank = 0; 
  rank+1;

proc means data = port2x noprint; by wficn crsp_cl_grp caldt;
  var market_value; where 1 <= rank <= 10;
  output out = port2c (drop=_type_ _freq_) sum = market_val_top10;

proc means data = port2x noprint; by wficn crsp_cl_grp caldt;
  var market_value; 
  output out = port2x (drop=_type_ _freq_) sum = market_val_all;

data temp.port_char; merge port2a port2b port2c port2x holdings_char port_sum1b; by wficn crsp_cl_grp caldt;
  if mtna > 0 then pct_common_eq = port_value_common/(mtna*1000000);

proc sort data = holdings_char nodupkey; by wficn crsp_cl_grp caldt;
run;


proc sort data = fundinfo_qtr2; by wficn crsp_cl_grp caldt;

data temp.fundinfo; merge fundinfo_qtr2 temp.port_char; by wficn crsp_cl_grp caldt;
  top10_conc = market_val_top10/market_val_all;

proc sort data = temp.fundinfo nodupkey; by wficn crsp_cl_grp caldt; 
run;


/* ********************************************************************************** */
/* ***************************** Start of computing fund flows ************************/
/* ********************************************************************************** */

proc sort data = crspmf.Monthly_tna_ret_nav out = mfmrna nodupkey; by crsp_fundno descending caldt;

data mfmrna1; set mfmrna; by crsp_fundno descending caldt;
  set mfmrna (firstobs = 2 keep = mret mtna caldt rename=(mret=lagret mtna=lagtna caldt=lagcaldt));
  if last.crsp_fundno or intck('month', lagcaldt, caldt) ne 1 then do;
    lagret = .; lagtna = .;
  end;

proc sort data = mfmrna; by crsp_fundno caldt;

data start_dt; set mfmrna; by crsp_fundno;
  if first.crsp_fundno;
  start_dt = caldt;
  format start_dt date9.;
  keep crsp_fundno start_dt;
run;

data mfmrna1; merge mfmrna1 start_dt; by crsp_fundno;

proc freq data = mfmrna1;
  tables caldt;
run;

/* get crsp_fundno mergers */
proc sort data = crspmf.fund_summary2 (keep=merge_fundno crsp_fundno fund_name end_dt) out = merge1 nodupkey; by crsp_fundno merge_fundno;
  where merge_fundno ne .; * merge_fundno is the fundno for acquirers;
run;

proc sql;
  create table merge1a as
  select a.*, b.mtna, b.caldt
  from merge1 as a left join crspmf.monthly_tna_ret_nav as b
  on a.crsp_fundno = b.crsp_fundno and -6 <= intck('month', b.caldt, a.end_dt) <= 6;
quit;

proc sort data = merge1a; by crsp_fundno merge_fundno caldt; where mtna ne .;

data merge2; set merge1a; by crsp_fundno merge_fundno caldt;
  if last.merge_fundno;
run;
* merge2 contain crsp_fundno that are being acquired by merge_fundno;

proc sql;
  create table fundret2a as
  select a.*, b.mtna as merge_mtna, b.crsp_fundno as crsp_fundno_target, b.fund_name as fund_target
  from mfmrna1 as a left join merge2 as b
  on a.crsp_fundno = b.merge_fundno and intck('month', b.end_dt, a.caldt) = 1;
quit;

proc sort data = fundret2a; by crsp_fundno descending caldt;

data fundret2b; set fundret2a; by crsp_fundno descending caldt;
  if merge_mtna = . then merge_mtna = 0;
  if first.caldt then total_merge = 0;
  if merge_mtna > 0 then total_merge + merge_mtna; 
    * may have multiple funds merged into the acquirer;

  if first_offer_dt > start_dt and start_dt ne . then first_offer_dt = start_dt;
  if first_offer_dt = . then first_offer_dt = start_dt;
  age = intck('month', first_offer_dt, caldt);
  age12 = int((age-1)/12);
  if lagtna = -99 then lagtna = .;
  if mtna = -99 then mtna = .;
  if last.caldt then output;
  drop merge_mtna crsp_fundno_target fund_target;
run;

data fundno2wficn; set mflinks.mflink1;
  if crsp_fundno ne . and wficn ne .;

proc sql;
  create table fundmret1 as
  select a.*, b.wficn
  from fundret2b as a left join fundno2wficn as b
  on a.crsp_fundno = b.crsp_fundno;
quit;

proc sql;
  create table fundmret1 as
  select a.*, b.crsp_cl_grp
  from fundmret1 as a left join crspmf.fund_names as b
  on a.crsp_fundno = b.crsp_fundno and b.chgdt <= a.caldt <= b.chgenddt;
quit;

proc sort data = fundmret1 nodupkey; by wficn crsp_fundno caldt;

proc means data = fundmret1 n min p99 max; class caldt;
  var lagtna mtna mret; 
run; 


* Aggregate to fund level;
proc sort data = fundmret1; by wficn crsp_cl_grp caldt; 

data unique; set fundmret1; by wficn crsp_cl_grp caldt;
  if first.caldt and last.caldt;
  if lagtna > 0 then flow = (mtna-(1+mret)*lagtna-total_merge)/lagtna;
run;

data multiple; set fundmret1; by wficn crsp_cl_grp caldt;
  if not (first.caldt and last.caldt);

proc sort; by wficn crsp_cl_grp caldt mtna;

data other; set multiple; by wficn crsp_cl_grp caldt;
  if last.caldt;
  keep wficn crsp_cl_grp caldt crsp_fundno first_offer_dt age age12 start_dt;

proc means data = multiple noprint; by wficn crsp_cl_grp caldt;
  var mtna lagtna total_merge;
  output out = multiple1a (drop=_type_ _freq_) sum=mtna lagtna total_merge;

proc means data = multiple noprint; by wficn crsp_cl_grp caldt;
  var mret; weight lagtna; where lagtna not missing;
  output out = multiple1b (drop=_type_ _freq_) mean=mret;

data multiple2; merge multiple1a multiple1b; by wficn crsp_cl_grp caldt;
  if lagtna > 0 then flow = (mtna-(1+mret)*lagtna-total_merge)/lagtna; 
    
data multiple2; merge other multiple2; by wficn crsp_cl_grp caldt;

data fundmret2; set unique multiple2;

proc sql;
  create table fundmret2 as
  select a.*, b.crsp_obj_cd
  from fundmret2 as a, crspmf.fund_summary2 as b
  where a.crsp_fundno = b.crsp_fundno and intck('quarter', a.caldt, b.caldt) in (0) and b.summary_period = 'Q' and crsp_obj_cd ne '';
quit;

proc sort data = fundmret2; by crsp_fundno caldt;

data fundmret2; set fundmret2; by crsp_fundno caldt;
  retain lcrsp_obj_cd;
  if crsp_obj_cd ne '' then lcrsp_obj_cd=crsp_obj_cd; 
  if first.crsp_fundno and crsp_obj_cd = '' then lcrsp_obj_cd = ''; 
  crsp_obj_cd2=crsp_obj_cd; 
  if not first.crsp_fundno and crsp_obj_cd = '' and lcrsp_obj_cd ne '' then crsp_obj_cd2 = lcrsp_obj_cd; 
  drop lcrsp_obj_cd;

proc sort data = fundmret2; by crsp_fundno descending caldt;

data fundmret2; set fundmret2; by crsp_fundno descending caldt;
  retain lcrsp_obj_cd;
  if crsp_obj_cd ne '' then lcrsp_obj_cd=crsp_obj_cd; 
  if first.crsp_fundno and crsp_obj_cd = '' then lcrsp_obj_cd = ''; 
  crsp_obj_cd2=crsp_obj_cd; 
  if not first.crsp_fundno and crsp_obj_cd = '' and lcrsp_obj_cd ne '' then crsp_obj_cd2 = lcrsp_obj_cd; 
  drop lcrsp_obj_cd;

proc sort data = fundmret2; by crsp_obj_cd2 caldt;

proc means noprint data = fundmret2; by crsp_obj_cd2 caldt;
  var mret flow; weight lagtna; where lagtna > 0; 
  output out = style (drop=_type_ _freq_) mean = style_mret style_flow;
run;

data flow_monthly; merge fundmret2 style; by crsp_obj_cd2 caldt;
  mret_style_adj = mret-style_mret;
  flow_style_adj = flow-style_flow;
  mret_style_adj2 = mret_style_adj*mret_style_adj;
  log_flow = log(1+flow);
  log_style_flow = log(1+style_flow);
  year = year(caldt);
  qtr = intnx('quarter', caldt, 0, 'end');
  format qtr date9.;

proc sort data = flow_monthly; by wficn crsp_cl_grp year;

proc means data = flow_monthly noprint; by wficn crsp_cl_grp year;
  var flow_style_adj flow;
  output out = flow_annual2 (drop=_type_ _freq_) mean = flow_style_adj flow std = flow_style_adj_std flow_std sum(log_flow) = clog_flow;

data flow_annual2; set flow_annual2;
  flow_comp = exp(clog_flow)-1;

%winsor(dsetin=flow_annual2, dsetout=temp.flow_annual, byvar=none, vars=flow_style_adj flow_style_adj_std flow_std flow_comp, type=winsor, pctl=1 99); 

proc means data = temp.flow_annual n mean median min max p1 p99;
  var flow_style_adj flow flow_comp flow_std;
run;

proc sort data = flow_monthly (keep=wficn crsp_cl_grp qtr) nodupkey out = qtrly; by wficn crsp_cl_grp qtr;

proc sql;
  create table fundmret2 as
  select a.*, b.flow_style_adj
  from qtrly as a, flow_monthly as b
  where a.wficn = b.wficn and a.crsp_cl_grp = b.crsp_cl_grp and 0 <= intck('month', b.caldt, a.qtr) <= 11;
quit;

proc sort; by wficn crsp_cl_grp qtr;

proc means noprint; by wficn crsp_cl_grp qtr;
  var flow_style_adj;
  output out = temp.flow_qtrly (drop=_type_ _freq_) sum(flow_style_adj) = flow_style_adj;  *** Style-adjusted flow in the last 12 months, on a quarterly frequency;
run;

/* ********************************************************************************** */
/* ****************************** End of computing fund flows *************************/
/* ********************************************************************************** */






**********************************************************************************************************;
************************** Start of computing style-adjusted returns *************************************;
**********************************************************************************************************;
proc sql;
  create table monthly_tna_ret_nav as
  select a.crsp_fundno, intnx('month', a.caldt, 0, 'end') as caldt, a.mret, b.crsp_obj_cd
  from crspmf.monthly_tna_ret_nav as a, crspmf.fund_summary2 as b
  where a.crsp_fundno = b.crsp_fundno and intck('quarter', a.caldt, b.caldt) in (0) and b.summary_period = 'Q' and year(a.caldt) >= 1998; 
quit;

proc sort data = monthly_tna_ret_nav; by crsp_fundno caldt;

data monthly_tna_ret_nav; set monthly_tna_ret_nav; by crsp_fundno caldt;
  retain lcrsp_obj_cd;
  if crsp_obj_cd ne '' then lcrsp_obj_cd=crsp_obj_cd; 
  if first.crsp_fundno and crsp_obj_cd = '' then lcrsp_obj_cd = ''; 
  crsp_obj_cd2=crsp_obj_cd; 
  if not first.crsp_fundno and crsp_obj_cd = '' and lcrsp_obj_cd ne '' then crsp_obj_cd2 = lcrsp_obj_cd; 
  drop lcrsp_obj_cd;

proc sort data = monthly_tna_ret_nav; by crsp_fundno descending caldt;

data monthly_tna_ret_nav; set monthly_tna_ret_nav; by crsp_fundno descending caldt;
  retain lcrsp_obj_cd;
  if crsp_obj_cd ne '' then lcrsp_obj_cd=crsp_obj_cd; 
  if first.crsp_fundno and crsp_obj_cd = '' then lcrsp_obj_cd = ''; 
  crsp_obj_cd2=crsp_obj_cd; 
  if not first.crsp_fundno and crsp_obj_cd = '' and lcrsp_obj_cd ne '' then crsp_obj_cd2 = lcrsp_obj_cd; 
  drop lcrsp_obj_cd;

proc sql;
  create table monthly_tna_ret_nav as
  select a.*, b.mtna as mtna_pr1
  from monthly_tna_ret_nav as a left join crspmf.monthly_tna_ret_nav as b
  on a.crsp_fundno = b.crsp_fundno and intck('month', b.caldt, a.caldt) = 1;
quit;

proc sort data = monthly_tna_ret_nav; by crsp_obj_cd2 caldt;

proc means noprint data = monthly_tna_ret_nav; by crsp_obj_cd2 caldt;
  var mret; weight mtna_pr1; where mtna_pr1 ne .;
  output out = style_ret (drop=_type_ _freq_) mean = mret_style;

data temp.style_adj_ret; merge monthly_tna_ret_nav style_ret; by crsp_obj_cd2 caldt;
  mret_style_adj = mret-mret_style;
  format caldt date9.;

proc means data = temp.style_adj_ret n mean median min p1 p5 p10 p25 p75 p90 p95 p99 max;
  var mret_style_adj mret mret_style; 
run;
* temp.style_adj_ret contains style-adjusted returns for crsp_fundno at a monthly frequency;

***********************************************;

* Annual style-adjusted returns;
data fundno2wficn; set mflinks.mflink1;
  if crsp_fundno ne . and wficn ne .;

proc sql;
  create table fundret as
  select a.*, b.mret, b.mret_style_adj, b.caldt
  from fundno2wficn as a left join temp.style_adj_ret as b
  on a.crsp_fundno = b.crsp_fundno;
quit;

proc sql;
  create table fundret as
  select a.*, b.crsp_cl_grp
  from fundret as a left join crspmf.fund_names as b
  on a.crsp_fundno = b.crsp_fundno and b.chgdt <= a.caldt <= b.chgenddt;
quit;

proc sort data = fundret nodupkey; by wficn crsp_fundno caldt;

proc freq data = fundret; tables caldt;
run;

proc sql;
  create table fundret as
  select a.*, b.exp_ratio as expenses
  from fundret as a left join crspmf.Fund_Fees as b
  on a.crsp_fundno = b.crsp_fundno and a.caldt between b.begdt and b.enddt;
quit;

proc sql;
  create table fundret as
  select a.*, b.mtna as mtna_pr1
  from fundret as a left join crspmf.monthly_tna_ret_nav as b
  on a.crsp_fundno = b.crsp_fundno and intck('month', b.caldt, a.caldt) = 1;
quit;

data fundret; set fundret; 
  year = year(caldt);
  if mtna_pr1 <= -99 then mtna_pr1 = .;
  if expenses <= -99 then expenses = .;

proc sort data = fundret; by crsp_fundno caldt;

data fundret; set fundret; by crsp_fundno caldt;
  retain lexpenses;
  if expenses ne . then lexpenses=expenses; 
  if first.crsp_fundno and expenses = . then lexpenses = .; 
  expenses2=expenses; 
  if not first.crsp_fundno and expenses = . and lexpenses ne . then expenses2 = lexpenses; 
  drop lexpenses;

proc sort data = fundret; by crsp_fundno descending caldt;

* rsubmit;
data fundret; set fundret; by crsp_fundno descending caldt;
  retain lexpenses;
  if expenses ne . then lexpenses=expenses; 
  if first.crsp_fundno and expenses = . then lexpenses = .; 
  if expenses2 = . then expenses2=expenses; 
  if not first.crsp_fundno and expenses = . and lexpenses ne . and expenses2 = . then expenses2 = lexpenses; 
  drop lexpenses;

  gret = mret+expenses2/12;

proc means data = fundret n mean median p5 p95 p25 p75 min p1 p99 max;
  var mret gret expenses expenses2; run;

proc sort data = fundret nodupkey; by wficn crsp_cl_grp year caldt crsp_fundno; run;

proc means noprint data = fundret; by wficn crsp_cl_grp year caldt;
  var mret gret mret_style_adj; weight mtna_pr1; where mtna_pr1 ne .;
  output out = temp.fundret_monthly (drop=_type_ _freq_) mean = mret gret mret_style_adj SUMWGT = mtna_pr1;

data temp.fundret_monthly; set temp.fundret_monthly;
  log_mret = log(1+mret);
run;
********************************************************************************************************************************************************************;
* temp.fundret_monthly contains fund (style-adjusted) returns at the fund(wficn)-month level ******************************************************************************;
********************************************************************************************************************************************************************;


data quarterly; set temp.fundret_monthly;
  qtr = intnx('quarter', caldt, 0, 'end');
  format qtr date9.;

proc sort nodupkey data = quarterly (keep=wficn crsp_cl_grp qtr); by wficn crsp_cl_grp qtr;

proc sql;
  create table quarterly as
  select a.*, b.mret, b.gret, b.mret_style_adj, b.log_mret
  from quarterly as a left join temp.fundret_monthly as b
  on a.wficn = b.wficn and a.crsp_cl_grp = b.crsp_cl_grp and 0 <= intck('month', b.caldt, a.qtr) <= 11;
quit;

proc sort data = quarterly; by wficn crsp_cl_grp qtr;

proc means noprint data = quarterly; by wficn crsp_cl_grp qtr;
  var mret gret mret_style_adj; 
  output out = temp.fundret_qtr_annual (drop=_type_ _freq_) mean = mret gret mret_style_adj std = mret_std gret_std mret_style_adj_std n(mret_style_adj) = num_months;
run;
********************************************************************************************************************************************************************;
* temp.fundret_qtr_annual contains fund returns during the 12-month period ending in a given quarter-end at the fund(wficn)-quarter level ******************************************************************************;
********************************************************************************************************************************************************************;


proc freq data = temp.fundret_monthly; tables caldt;
run;


proc means noprint data = temp.fundret_monthly; by wficn crsp_cl_grp year;
  var mret gret mret_style_adj; 
  output out = fundret_annual (drop=_type_ _freq_) mean = mret gret mret_style_adj std = mret_std gret_std mret_style_adj_std sum(log_mret) = clog_mret n(mret_style_adj) = num_months;

data fundret_annual; set fundret_annual;
  ret_comp = exp(clog_mret)-1;
run;
run;




******************************************************************** FF4-adjusted **********************************************************************************;
******************************************************************** FF4-adjusted **********************************************************************************;
proc sort data = fundret_annual (keep=wficn crsp_cl_grp year) out = fund_dates nodupkey; by wficn crsp_cl_grp year;

data fund_dates; set fund_dates;
  st_date = mdy(1,31,year); 
  format st_date date9.;

proc sql;
  create table fund_beta as
  select a.wficn, a.crsp_cl_grp, a.year, b.caldt, b.mret, b.gret, b.mtna_pr1
  from fund_dates as a, temp.fundret_monthly as b
  where a.wficn = b.wficn and a.crsp_cl_grp = b.crsp_cl_grp and 1 <= intck('month', b.caldt, a.st_date) <= 30;
quit;

*** BETAs;
proc sql;
  create table fund_beta as
  select a.*, b.hml, b.smb, b.mktrf, b.umd, b.rf, a.mret-b.rf as mretx, a.gret-b.rf as gretx 
  from fund_beta as a, ff.factors_monthly as b
  where year(a.caldt) = year(b.dateff) and month(a.caldt) = month(b.dateff);
quit;

proc sort; by wficn crsp_cl_grp year;

proc means data = fund_beta noprint; by wficn crsp_cl_grp year;
  var mret; where mret > .;
  output out = count (drop=_type_ _freq_) n = num_months_beta;
run;

proc reg data = fund_beta noprint outest=mktbeta(keep=wficn crsp_cl_grp year intercept mktrf);
  by wficn crsp_cl_grp year; where mretx > .;
  model mretx = mktrf;

data mktbeta; set mktbeta;
  alpha_m = intercept; beta = mktrf;
  drop mktrf intercept;

proc reg data = fund_beta noprint outest=ff4beta(keep=wficn crsp_cl_grp year intercept mktrf smb hml umd); *;
  by wficn crsp_cl_grp year; where mretx > .;
  model mretx = mktrf smb hml umd; *;

data ff4beta; set ff4beta;
  alpha_ff4 = intercept; betam4 = mktrf; betas4 = smb; betah4 = hml; betau4 = umd;
  drop intercept mktrf smb hml umd;

proc reg data = fund_beta noprint outest=ff4beta2(keep=wficn crsp_cl_grp year intercept mktrf smb hml umd); *;
  by wficn crsp_cl_grp year; where gretx > .;
  model gretx = mktrf smb hml umd; *;

data ff4beta2; set ff4beta2;
  alpha_ff4g = intercept; betam4g = mktrf; betas4g = smb; betah4g = hml; betau4g = umd;
  drop intercept mktrf smb hml umd;

data fund_betas; merge mktbeta ff4beta ff4beta2 count; by wficn crsp_cl_grp year; 
  keep wficn crsp_cl_grp year alpha_m beta alpha_ff4 betam4 betas4 betah4 betau4 alpha_ff4g betam4g betas4g betah4g betau4g num_months_beta; *;

proc sort data = fund_betas; by wficn crsp_cl_grp year;

proc means data = fund_betas n mean median min max p1 p99;
run;


***** Abnormal fund return in year t;
proc sql;
  create table fund_abnret as
  select a.wficn, a.crsp_cl_grp, a.year, b.caldt, b.mret, b.gret, b.mtna_pr1
  from fund_dates as a left join temp.fundret_monthly as b
  on a.wficn = b.wficn and a.crsp_cl_grp = b.crsp_cl_grp and a.year=year(b.caldt);
quit;

proc sql;
  create table fund_abnret as
  select a.*, b.hml, b.smb, b.mktrf, b.umd, b.rf, a.mret-b.rf as mretx, a.gret-b.rf as gretx 
  from fund_abnret as a, ff.factors_monthly as b
  where year(a.caldt) = year(b.dateff) and month(a.caldt) = month(b.dateff);
quit;

proc sort; by wficn crsp_cl_grp year;

data fund_abnret2; merge fund_abnret fund_betas; by wficn crsp_cl_grp year;
  ret_capm = mret-rf-beta*mktrf; 
  retff4 = mret-rf-betam4*mktrf-betah4*hml-betas4*smb-betau4*umd; 
  gretff4 = gret-rf-betam4g*mktrf-betah4g*hml-betas4g*smb-betau4g*umd; 

proc sort; by wficn crsp_cl_grp year num_months_beta;

proc means data = fund_abnret2 noprint; by wficn crsp_cl_grp year num_months_beta;
  var mret mretx ret_capm retff4 gretff4; 
  output out = fund_abnret3 (drop=_type_ _freq_) sum = car_mret car_mretx car_capm car_ff4 gcar_ff4 mean = mret mretx ret_capm retff4 gretff4 n(retff4) = num_months;

data temp.fund_abnret_annual; merge fundret_annual fund_abnret3; by wficn crsp_cl_grp year; 
run;

********************************************************************************************************************************************************************;
* temp.fund_abnret_annual contains fund returns during a given year at the fund(wficn)-year level ******************************************************************************;
********************************************************************************************************************************************************************;
